#install.packages("rlang")
#library(rlang)
library(tidyverse)
library(haven)
library(formatR)
library(lubridate)
library(smooth)
library(forecast)
library(scales)
library(kableExtra)
library(ggplot2)
library(readxl)
library(tidyverse)
library(data.table)
library(quantmod)
library(geofacet)
library(janitor)


knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE
                      )

Inspecting new FY22 data files

Data files for closed years have been obtained from IOC.

Revenue File:
- 684 Fund Numbers
- 80 Agencies
- 1185 Revenue sources

Expenditure File:
- 708 Fund Numbers
- 107 Agencies
- 98 Division Numbers
- 313 Division names

Data Creation and Cleaning

  • Do the FOIA request. In a week or so, they send the expenditure and revenue data as excel files.
  • Checks whether there are any new agencies, re-used funds etc. Create a list of funds, agencies, fund names, etc. for the new year and compare it to the immediate prior year to identify new funds.
  • Update the funds_ab_in file which shows the use of funds. Use criteria to determine if the new funds should be in or out of the all-funds frame.
  • Then, download the excel files that are sent to you.
  • Open and change the names to be consistent with other files such as AGENCYNAME–> agency_name

Combine past years: All revenue files are in a revenue folder that I reference when I set the working directory. When adding new fiscal years, put the the newest year of data for revenue and expenditures in their respective folders.

The code chunk below takes the .dta files for all fiscal years before FY 2022 and binds them together. Variable names were manually changed by past researchers so that they were consistent across years.

Reads in dta file and leaves fund as a character. No longer have to worry about preserving leading zeros in categories like the fund numbers. State code used to force fund, source, and from_fund to be 4 digits long and preserve leading zeros and fund was 3 digits long with leading zeros.

Code below reads in the csv files created in chunks above (allrevfiles.csv and allrexpfiles.csv). These files contain all years of data combined into one file BEFORE any recoding is done. Do not use this file for summing categories because it is just an in between step before recoding revenue and expenditure categories.

# combined in past chunks called create-rev-csv and create-exp-csv

allrevfiles22 <- read_csv("allrevfiles22.csv") #combined but not recoded
allexpfiles22 <- read_csv("allexpfiles22.csv") #combined but not recoded

Normally, when your receive the new fiscal year files from the Comptrollers office, you will need to change the variable names so that they are consistent with past years. This is an example of reading in the new file and changing the variable names. They seem to change almost every year in the file received from the FOIA so if the code breaks here, check to make sure that the columns you are trying to rename exist and are spelled correctly!

For FY 2023 and after, .dta files can be avoided entirely and .csv files and R code will be used. All files before this year had been saved and passed on as .dta files for Stata code before the transition to R in Fall 2022

Example code below: Read in excel file and rename columns so that it plays well with the other years’ files.

revenue_fy22 <- read_xlsx("Fis_Fut_Rev_2022_Final.xlsx") %>% 
  rename(fy = 'FY',
         fund = 'FUND',
         fund_name = 'FUND NAME',
         agency = 'AGENCY',
         agency_name = 'AGENCY NAME',
         source = 'REVENUE SOURCE',
         source_name = 'REV SRC NAME',
         receipts = 'AMOUNT'
  ) 

exp_fy22 <- read_xlsx("Fis_Fut_Exp_2022_Final.xlsx") %>% 
  rename(fy = 'FY',
         fund = 'FUND',
         fund_name = 'FUND NAME',
         agency = 'AGENCY',
         agency_name = 'AGENCY NAME',
         appr_org = 'ORGANIZATION',
         org_name = 'ORGANIZATION NAME',
         obj_seq_type = 'APPROPRIATION',
         wh_approp_name = 'APPROPRIATION NAME',
        # exp_net_xfer = 'NET OF TRANS AMOUNT',
         expenditure = 'EXPENDED'
  ) 

# %>%
#   # these come from ioc_source file after merging
#   mutate(data_source = "exp IOC Aug 2022",
#          object = ,
#          seq = ,
#          type = ,
#          fund_cat = FIND_COLUMN, #create fund_cat column
#          fund_cat_name = FIND_NAME) # create fund_cat_name column

Finding new agencies and funds

Identify new and reused funds for newest fiscal year. Recode funds to take into account different fund numbers/names over the years. Update fund_ab_in_2022.xlsx with any changes from previous fiscal year.

Clarify and add steps for identifying new and reused funds.

New Agencies, Funds, and Organizations from Expenditure files:

  • Using the code below, I found 2 agencies, multiple funds, and a couple organizations that had not been specifically mentioned in the funds_ab_in file.
  • One new org name and org number combo has an expenditure of $600 million for pensions payment.
#### From Expenditure Data #####

# agencies referenced in any year before 2020:
agencies_past <- allexpfiles22 %>% filter(fy < 2022) %>% mutate(agency == as.character(agency)) %>% group_by(agency, agency_name) %>% unique() %>% summarize(expenditure = sum(expenditure, na.rm = TRUE)) %>% drop_na() %>% arrange(agency)
# agencies_past # 146 agencies ever


# agencies in 2022 data:
agencies22 <- allexpfiles22  %>% filter(fy == 2022) %>% mutate(agency == as.character(agency)) %>% group_by(agency, agency_name) %>% summarize(expenditure = sum(expenditure, na.rm = TRUE))
# agencies22 # 107 agencies this year


# 280 and 533 are new agency codes: 
anti_join(agencies22, agencies_past, by = c("agency", "agency_name")) %>% arrange(agency)
funds_past <- allexpfiles22  %>% filter(fy < 2022) %>% mutate(fund == as.character(fund)) %>% group_by(fund, fund_name) %>% summarize(count = n(), Expenditure = sum(expenditure, na.rm = TRUE))  %>% drop_na()

funds22 <- allexpfiles22  %>% filter(fy == 2022) %>% mutate(fund == as.character(fund)) %>% group_by(fund, fund_name) %>%  summarize(count = n(), Expenditure = sum(expenditure, na.rm = TRUE)) %>% unique()


# 15 funds were in FY22 data that were not in past data:
anti_join(funds22, funds_past, by = c("fund", "fund_name")) %>% arrange(fund)
# orgs_pastin the past =  916 org groups ever
orgs_past <- allexpfiles22  %>% filter(fy < 2022) %>% mutate(appr_org == as.character(appr_org)) %>% group_by(appr_org, org_name) %>% unique() %>% summarize(Expenditure = sum(expenditure, na.rm = TRUE)) %>% drop_na()

# orgs_past # 916 org groups ever
orgs22 <- allexpfiles22  %>% filter(fy == 2022) %>% mutate(appr_org = as.character(appr_org)) %>% group_by(appr_org, org_name) %>% summarize(Expenditure = sum(expenditure, na.rm = TRUE))
# orgs22 # 396 org groups this year


# 4 org number and org name combos are new for FY2022:
anti_join(orgs22, orgs_past, by = c("appr_org", "org_name")) %>% arrange(appr_org)

New Revenue Funds, Sources, and New Agencies:

#### From Revenue Data ####



# agencies_past # 108 agencies ever
agencies_past <- allrevfiles22  %>% filter(fy < 2022) %>% mutate(agency == as.character(agency)) %>% group_by(agency, agency_name) %>% unique() %>% summarize(Receipts = sum(receipts, na.rm = TRUE)) %>% drop_na()


# agencies22 # 80 agencies this year
agencies22 <- allrevfiles22  %>% filter(fy == 2022) %>% mutate(agency == as.character(agency)) %>% group_by(agency, agency_name) %>% summarize(Receipts = sum(receipts, na.rm = TRUE))

# 0 new agencies in revenue data this year
anti_join(agencies22, agencies_past, by = c("agency", "agency_name")) %>% arrange(agency)
funds_past <- allrevfiles22  %>% filter(fy < 2022) %>% mutate(fund == as.character(fund)) %>% group_by(fund, fund_name) %>% summarize(count = n(), Receipts = sum(receipts, na.rm = TRUE))  %>% drop_na()

funds22 <- allrevfiles22  %>% filter(fy == 2022) %>% mutate(fund == as.character(fund)) %>% group_by(fund, fund_name) %>%  summarize(count = n(), Receipts = sum(receipts, na.rm = TRUE)) %>% unique() %>% drop_na()

# 19 revenue funds were in FY22 revenue data that were not in past data 
# some could be small fund name changes
anti_join(funds22, funds_past, by = c("fund", "fund_name")) %>% arrange(fund)
sources_past <- allrevfiles22  %>% filter(fy < 2022) %>% mutate(source == as.character(source)) %>% group_by(source, source_name) %>% summarize(count = n(), Receipts = sum(receipts, na.rm = TRUE))  %>% drop_na()

sources22 <- allrevfiles22  %>% filter(fy == 2022) %>% mutate(source == as.character(source)) %>% group_by(source, source_name) %>%  summarize(count = n(), Receipts = sum(receipts, na.rm = TRUE)) %>% unique()


# 20 revenue sources were in FY22 data that were not in past data 
# some could be small source name changes:
anti_join(sources22, sources_past, by = c("source", "source_name")) %>% arrange(source)

Sources 2737 through 2756 were not found in the IOC_source file so I added them. They do NOT have a rev_type until the Fiscal Futures researchers discuss which revenue type the sources fall under and if they should be included in the analysis in general.

Recoding New and Reused Funds

For funds that were reused once, a 9 replaces the 0 as the first digit. If reused twice, then the first two values are 10.
- Ex. 0350 –> 9350 because its use changed.
- Ex. 0367 becomes 10367 because its use has changed twice now. There was fund 0367 originally, then its use changed and it was recoded as 9367, and now it changed again so it is a 10367.

# if first character is a 0, replace with a 9

rev_1998_2022 <- allrevfiles22 %>%
      mutate(fund = ifelse(fy < 2002 & fund %in% c("0730", "0241", "0350", "0367", "0381", "0382", "0526", "0603", "0734", "0913", "0379"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund = ifelse(fy < 2008 & fund %in% c("0027", "0033", "0037", "0058", "0062", "0066", "0075", "0083", "0116", "0119", "0120", "0122", "0148", "0149", "0157", "0158", "0166", "0194", "0201", "0209", "0211", "0217", "0223", "0231", "0234", "0253", "0320", "0503", "0505", "0512", "0516", "0531", "0532", "0533", "0547", "0563", "0579", "0591", "0606", "0616", "0624", "0659", "0662", "0665", "0676", "0710", 
"0068", "0076", "0115", "0119", "0168", "0182", "0199", "0241", "0307", "0506", "0509", "0513"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund = ifelse(fy < 2016 & fund %in% c("0263", "0399", "0409"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund =  ifelse(fy < 2017 & fund == "0364", str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund =  ifelse(fy < 2018 & fund %in% c("0818", "0767", "0671", "0593", "0578"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund = ifelse(fy>1999 & fy < 2018 & fund == "0231", "10231", fund) ) %>%
  
  mutate(fund = ifelse(fy < 2019 & fund %in% c("0161", "0489", "0500", "0612", "0893", "0766"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund =  ifelse(fy < 2020 & fund %in% c("0254", "0304", "0324", "0610", "0887", "0908", "0939", "0968"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund =  ifelse(fy < 2021 & fund %in% c("0255", "0325", "0348", "0967", "0972"), str_replace(fund, "0","9"), fund) ) %>%
  
   #2022 changes
  mutate(fund = ifelse(fy < 2022 & fund %in% c("0110","0165","0351", "0392", "0393", "0422", "0544", "0628", "0634",  "0656", "0672", "0683", "0723", "0742", "0743"), str_replace(fund, "0","9"), as.character(fund))) %>%  # replaces first 0 it finds with a 9
  mutate(fund = ifelse(fy < 2022 & fund == "0367", "10367", as.character(fund)) # fund reused for 3rd time
)

Expenditure recoding:

# if first character is a 0, replace with a 9

exp_1998_2022 <- allexpfiles22 %>%
      mutate(fund = ifelse(fy < 2002 & fund %in% c("0730", "0241", "0350", "0367", "0381", "0382", "0526", "0603", "0734", "0913", "0379"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund = ifelse(fy < 2008 & fund %in% c("0027", "0033", "0037", "0058", "0062", "0066", "0075", "0083", "0116", "0119", "0120", "0122", "0148", "0149", "0157", "0158", "0166", "0194", "0201", "0209", "0211", "0217", "0223", "0231", "0234", "0253", "0320", "0503", "0505", "0512", "0516", "0531", "0532", "0533", "0547", "0563", "0579", "0591", "0606", "0616", "0624", "0659", "0662", "0665", "0676", "0710", 
"0068", "0076", "0115", "0119", "0168", "0182", "0199", "0241", "0307", "0506", "0509", "0513"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund = ifelse(fy < 2016 & fund %in% c("0263", "0399", "0409"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund =  ifelse(fy < 2017 & fund == "0364", str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund =  ifelse(fy < 2018 & fund %in% c("0818", "0767", "0671", "0593", "0578"), str_replace(fund, "0","9"), fund)) %>%
  mutate(fund = ifelse(fy>1999 & fy < 2018 & fund == "0231", "10231", fund) ) %>%
  
  mutate(fund = ifelse(fy < 2019 & fund %in% c("0161", "0489", "0500", "0612", "0893", "0766"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund =  ifelse(fy < 2020 & fund %in% c("0254", "0304", "0324", "0610", "0887", "0908", "0939", "0968"), str_replace(fund, "0","9"), fund)) %>%
  
  mutate(fund =  ifelse(fy < 2021 & fund %in% c("0255", "0325", "0348", "0967", "0972"), str_replace(fund, "0","9"), fund))  %>%
  
  #2022 changes
  mutate(fund = ifelse(fy < 2022 & fund %in% c("0110","0165","0351", "0392", "0393", "0422", "0544", "0628", "0634",  "0656", "0672", "0683","0723", "0742", "0743"), str_replace(fund, "0","9"), as.character(fund))) %>%  # replaces first 0 it finds with a 9
  mutate(fund = ifelse(fy < 2022 & fund == "0367", "10367", as.character(fund)) # fund reused for 3rd time 
  )
funds_ab_in_2022 = readxl::read_excel("C:/Users/aleaw/OneDrive/Documents/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/Replication-Files/funds_ab_in_2022.xlsx")


exp_temp <- exp_1998_2022 %>% 
  arrange(fund, fy) %>%
  filter(expenditure != 0) %>% # keeps everything that is not zero
# join  funds_ab_in_2021  to exp_temp
 left_join(funds_ab_in_2022, by = "fund")  # matches most recent fund number 
  • the initial combined years of data are saved as dataframes named exp_1998_2022 and rev_1998_2022. These are then saved as exp_temp and rev_temp while recoding variables. This is BEFORE category groups are created and cleaned below. Only a temporary file, do not use for analysis.

Update Agencies: Early agencies replaced by successors

# recodes old agency numbers to consistent agency number
exp_temp <- exp_temp %>% 
  mutate(agency = case_when(
    (agency=="438"| agency=="475" |agency == "505") ~ "440",
    # financial institution &  professional regulation &
     # banks and real estate  --> coded as  financial and professional reg
    agency == "473" ~ "588", # nuclear safety moved into IEMA
    (agency =="531" | agency =="577") ~ "532", # coded as EPA
    (agency =="556" | agency == "538") ~ "406", # coded as agriculture
    agency == "560" ~ "592", # IL finance authority (fire trucks and agriculture stuff)to state fire marshal
    agency == "570" & fund == "0011" ~ "494",   # city of Chicago road fund to transportation
    TRUE ~ (as.character(agency)))) 

Modify Expenditure File

Tax refunds

Aggregate expenditures: Save tax refunds as negative revenue. Code refunds to match the rev_type codes (02=income taxes, 03 = corporate income taxes, 06=sales tax, 09=motor fuel tax, 24=insurance taxes and fees, 35 = all other tax refunds)

## negative revenue becomes tax refunds

tax_refund_long <- exp_temp %>% 
 # fund != "0401" # removes State Trust Funds
  filter(fund != "0401" & (object=="9910"|object=="9921"|object=="9923"|object=="9925")) %>%
  # keeps these objects which represent revenue, insurance, treasurer,and financial and professional reg tax refunds
  mutate(refund = case_when(
    fund=="0278" & sequence == "00" ~ "02",   # for income tax refund
    fund=="0278" & sequence == "01" ~ "03", # tax administration and enforcement and tax operations become corporate income tax refund
     fund == "0278" & sequence == "02" ~ "02",
    object=="9921" ~ "21", # inheritance tax and estate tax refund appropriation
    object=="9923" ~ "09", # motor fuel tax refunds
    obj_seq_type == "99250055" ~ "06", # sales tax refund
    fund=="0378" & object=="9925" ~ "24", # insurance privilege tax refund
    fund=="0001" & object=="9925" ~ "35", #all other taxes
      T ~ "CHECK")) # if none of the items above apply to the observations, then code them as CHECK 

    
exp_temp <- left_join(exp_temp, tax_refund_long) %>%
  mutate(refund = ifelse(is.na(refund),"not refund", as.character(refund)))

tax_refund <- tax_refund_long %>% 
  group_by(refund, fy)%>%
  summarize(refund_amount = sum(expenditure, na.rm = TRUE)/1000000) %>%
  pivot_wider(names_from = refund, values_from = refund_amount, names_prefix = "ref_") %>%
  mutate_all(~replace_na(.,0)) %>%
  arrange(fy)

tax_refund %>% pivot_longer( ref_02:ref_35, names_to = "Refund Type", values_to = "Amount") %>%
  ggplot()+
  geom_line(aes(x=fy,y=Amount, group = `Refund Type`, color = `Refund Type`))+
  labs(title = "Refund Types", caption = "Refunds are excluded from Expenditure totals and instead subtracted from Revenue totals")

# remove the items we recoded in tax_refund_long
exp_temp <- exp_temp %>% filter(refund == "not refund")

#should be 156 fewer observations

tax_refund will ultimately be removed from expenditure totals and instead subtracted from revenue totals (since they were tax refunds).

Pension Expenditures

300 million pension stabilization payment from fund 0319, object == 1900 for lump sums and other purposes.

State payments to the following pension systems:

• Teachers Retirement System (TRS)
- New POB bond in 2019: Accelerated Bond Fund paid benefits in advance as lump sum • State Employee Retirement System (SERS)
• State University Retirement System (SURS)
• Judges Retirement System (JRS)
• General Assembly Retirement System (GARS)

Check what is included in pensions:

  • object = 4431 catches most pension items (payments into pension fund)
    • includes 8 billion payment in 2004 that creates large peak in expenditure graphs
  • object = 1298 is purchase of investments and is excluded from analysis except for a couple exceptions during 2010 and 2011
    • Why only for those years and not other years?
  • object == 4430 is for pensions, annuities, and benefits
  • object = 1900 for pension stabilization under lump sums
# check what is being included in pensions

pension_check <- exp_temp %>% 
  mutate(pension = case_when( 
    (object=="4430") ~ "Object 4430 - OUT", # pensions, annuities, benefits
       (object=="4431") ~ "Object 4431 - IN", # 4431 = state payments into pension fund
        (obj_seq_type > "11590000" & obj_seq_type < "11660000")  ~ "Retirement Objects",

    # objects 1159 to 1166 are all considered Retirement by Comptroller 
    # object == 1167 also appears to be Other Retirement but isn't used yet
            TRUE ~ "0")) %>% 
  
  mutate(pension = case_when(
    (object=="1298" & (fund=="0477" | fund=="0479" | fund=="0481")) ~ "Purchase of Investments", # judges retirement "Purchase of investments"
  object == "1900" & fund == "0319" ~ "Pension Stabilization", # pension stabilization fund in 2022 
    TRUE ~ as.character(pension)) ) %>% 
  filter(pension != 0 )

pension_check %>% group_by(fy, pension) %>% 
  summarize(expenditure = sum(expenditure, na.rm = TRUE)) %>%
  ggplot(aes(x=fy, y = expenditure, color = pension)) + 
  geom_line() + 
  labs (title = "Pension Fund Payments In and Retirement Benefits Out", caption = "Includes items from objects 1160-1165, 1298, and 1900 as pension expenditures (retirement benefits).
        Object = 4431 includes state payments INTO pension Fund.")

pension_check2 <- exp_temp %>% 
  mutate(pension = case_when( 
     #   (object=="4431" | (object>"1159" & object<"1166") ) ~ 1, 
       (object=="4431"  ) ~ 1, # 4431 = payments into pension fund
        (obj_seq_type > "11590000" & obj_seq_type < "11660000")  ~ 2,

    # objects 1159 to 1166 are all considered Retirement by Comptroller 
    # object == 1167 also appears to be Other Retirement but isn't used yet
            TRUE ~ 0)) %>% 
  
  mutate(pension = case_when(     # objects were weird for 2010 and 2011
   (object=="4431" & fund=="0473")  ~ 3, # teachers retirement system,  
  #  obj_seq_type == "44310055" ~ 3, # teachers retirement system in 2010 and 2011. 
   (object=="1298" & 
      #(fy==2010 | fy==2011) & 
      (fund=="0477" | fund=="0479" | fund=="0481")) ~ 3, # judges retirement 
   # obj_seq_type == "12980055" ~ 3, # judge retirement contributions during 2010 and 2011
  object == "1900" & fund == "0319" ~ 5, # pension stabilization fund in 2022 
    TRUE ~ pension)) %>% 
  filter(pension > 0 )


pension_check2 
#write_csv(pension_check, "pension_checkfy22v3.csv")

## taking care of Pension Obligation Bond proceeds

pension_check2 <- pension_check2 %>% 
  # change object for 2010 and 2011, retirement expenditures were bond proceeds
  mutate(object = ifelse((pension == 3 & in_ff == "0"), "4431", as.character(object))) %>% # changes weird teacher & judge retirement system  pensions object to normal pension object 4431
  mutate(pension =  ifelse(pension == 1 & in_ff == "0", 2, pension)) %>% # coded as 2 if it was supposed to be excluded due to being bond proceeds ?
  mutate(in_ff = ifelse((pension ==2 | pension ==3), "1", as.character(in_ff)))


# create file with all pension items to find any mistakes
#pension_check %>% write_csv("all_pensions.csv")

table(pension_check2$pension)
## 
##    1    2    3    5 
##  226 8737   22    5

Modify exp_temp and move all pension contributions to their own group (901):

exp_temp <-  exp_temp %>% 
  arrange(fund) %>%
  mutate(pension = case_when( 
    
  # objects were weird for 2010 and 2011 for teacher and judge retirement system
     # (object=="4431" & fund=="0473") ~ 3, # teachers retirement system, 
 # Was this: (object=="4431" & fund=="0473" &  (fy==2010 | fy==2011)) ~ 3, # teachers retirement system, 
   (object=="4431") ~ 1, # 4431 = easy to find pension payments INTO fund
   
  (object>"1159" & object<"1166") & fund != "0183" & fund != "0193"   ~ 2, 
   # objects 1159 to 1166 are all considered Retirement by Comptroller, OUT 
  # object == 1167 also appears to be Other Retirement but isn't used yet
 
  (object=="1298" &  # Purchase of Investments
     #(fy==2010 | fy==2011) & 
     (fund=="0477" | fund=="0479" | fund=="0481")) ~ 3, #judges retirement OUT of fund
 

 fund == "0319" ~ 4, #pension stabilization fund IN?
                                        TRUE ~ 0) )

table(exp_temp$pension) # same number of total observations > 0 as pension_check
## 
##      0      1      2      3      4 
## 158990    228   8719     20      5

Description of Pension Obligation Acceleration Bond at this link

# special accounting of pension obligation bond (POB)-funded contributions to JRS, SERS, GARS, TRS 

exp_temp <- exp_temp %>% 
  # change object for 2010 and 2011, retirement expenditures were bond proceeds
  mutate(object = ifelse((pension >0 & in_ff == "0"), "4431", object)) %>% 
  # changes weird teacher & judge retirement system  pensions object to normal pension object 4431
  mutate(pension =  ifelse(pension >0 & in_ff == "0", 6, pension)) %>% # coded as 2 if it was supposed to be excluded. 
  mutate(in_ff = ifelse(pension>0, "1", in_ff))

table(exp_temp$pension) 
## 
##      0      1      2      4      6 
## 158990    226   8586      5    155
# all other pensions objects  codes get agency code 901 for State Pension Contributions
exp_temp <- exp_temp %>% 
  mutate(agency = ifelse(pension>0, "901", as.character(agency)),
         agency_name = ifelse(agency == "901", "State Pension Contributions", as.character(agency_name)))

exp_temp %>% 
 filter(pension > 0) %>%  
  mutate(pension = as.factor(pension)) %>%
  group_by(fy, pension) %>% 
  summarize(expenditure = sum(expenditure, na.rm=TRUE)) %>%
  ggplot(aes(x=fy, y=expenditure, color = pension)) +
  geom_line() + 
  labs (title = "Pension Expenditures", caption = "Includes objects 1160-1165, 1298, 1900, and 4431 as pension expenditures.
        Object = 4431 includes payments INTO pension Fund.")

Drop Interfund transfers

  • object == 1993 is for interfund cash transfers
  • agency == 799 is for statutory transfers
  • object == 1298 is for purchase of investments and is not spending EXCEPT for pensions in 2010 and 2011 (and were recoded already to object == “4431”). Over 168,000 observations remain.
transfers_drop <- exp_temp %>% filter(
  agency == "799" | # statutory transfers
           object == "1993" |  # interfund cash transfers
           object == "1298") # purchase of investments

exp_temp <- anti_join(exp_temp, transfers_drop)

State employee healthcare costs

State Employee Health Care = Sum of expenditures for “health care coverage as elected by members per state employees group insurance act.” The payments are made from the Health Insurance Reserve Fund. We subtract the share that came from employee contributions. Employee contributions are not considered a revenue source or an expenditure in our analysis.

Fund = 0457 is “Group insurance premium”, in_ff = 1
Fund = 0193 is “Local govt health insurance reserve”, in=ff = 0
fund = 0477 is “Community College Health Insurance”, in=ff = 0.
- had large amount in early years
Fund = 0907 = health insurance reserve, in_ff = 1
Fund = 9939 is “group self-insurers’ insolv”, in_ff = 1
Fund = 0940 is Self-Insurers security, in_ff = 0
Fund = 0739 is Group Workers Comp Pool Insol, in_ff = 1

Employer contributions for group insurance are excluded to avoid double counting the cost of healthcare.

All employer contributions are coded as object = 1180.

  • eehc = 0 means it is NOT a state healthcare cost but it is an employer contribution of some type to some fund
  • eehc = 1 means it is a state employee healthcare cost and it is an employer contribution to health insurance

if observation is a group insurance contribution, then the expenditure amount is set to $0 (essentially dropped from analysis)

#if observation is a group insurance contribution, then the expenditure amount is set to $0 (essentially dropped from analysis)
exp_temp <- exp_temp %>% 
  mutate(eehc = ifelse(
    # group insurance contributions for 1998-2005 and 2013-present
   fund == "0001" & (object == "1180" | object =="1900") & agency == "416" & appr_org=="20", 0, 1) )%>% 
  mutate(eehc = ifelse(
    # group insurance contributions for 2006-2012
    fund == "0001" & object == "1180" & agency == "478" & appr_org=="80", 0, eehc) )%>%
     # group insurance contributions from road fund
  # coded with 1900 for some reason??
    mutate(eehc = ifelse(
      fund == "0011" & object == "1900" & agency == "416" & appr_org=="20", 0, eehc) ) %>%
  
  mutate(expenditure = ifelse(eehc=="0", 0, expenditure)) %>%
  
  mutate(agency = case_when(   # turns specific items into State Employee Healthcare (agency=904)
      fund=="0907" & (agency=="416" & appr_org=="20") ~ "904",   # central management Bureau of benefits using health insurance reserve 
      fund=="0907" & (agency=="478" & appr_org=="80") ~ "904",   # agency = 478: healthcare & family services using health insurance reserve - stopped using this in 2012
      TRUE ~ as.character(agency))) %>%
  mutate(agency_name = ifelse(agency == "904", "STATE EMPLOYEE HEALTHCARE", as.character(agency_name)),
         in_ff = ifelse( agency == "904", 1, in_ff),
         group = ifelse(agency == "904", "904", as.character(agency)))  # creates group variable

# Default group = agency number

healthcare_costs <- exp_temp %>% filter(group == "904")

healthcare_costs %>% group_by(fy) %>% summarise(healthcare_cost = sum(expenditure)) %>% arrange(-fy)
exp_temp <- anti_join(exp_temp, healthcare_costs) %>% mutate(expenditure = ifelse(object == "1180", 0, expenditure))

healthcare_costs_yearly <- healthcare_costs %>% group_by(fy, group) %>% summarise(healthcare_cost = sum(expenditure, na.rm = TRUE)/1000000) %>% select(-group)

This code chunk above for dealing with group insurance means that healthcare costs need to be added to expenditures after other group names are assigned. Then employee contributions/insurance premiums from the revenue side need to be subtracted from the total cost of employee healthcare for the net cost.

Local Transfers

Separate transfers to local from parent agencies that come from DOR(492) or Transportation (494). Treats muni revenue transfers as expenditures, not negative revenue.

The share of certain taxes levied state-wide at a common rate and then transferred to local governments. (Purely local-option taxes levied by specific local governments with the state acting as collection agent are NOT included.)

The five corresponding revenue items are:

• Local share of Personal Income Tax
• Local share of General Sales Tax
• Personal Property Replacement Tax on Business Income
• Personal Property Replacement Tax on Public Utilities
• Local share of Motor Fuel Tax - Transportation Renewal Fund 0952

Completed: Add the mft mentioned in GOMB email to code

exp_temp <- exp_temp %>% mutate(
  agency = case_when(fund=="0515" & object=="4470" & type=="08" ~ "971", # income tax 
                     fund=="0515" & object=="4491" & type=="08" & sequence=="00" ~ "971", 
                     fund=="0802" & object=="4491" ~ "972", #pprt transfer
                     fund=="0515" & object=="4491" & type=="08" & sequence=="01" ~ "976", #gst to local
                     fund=="0627" & object=="4472"~ "976" ,
                     fund=="0648" & object=="4472" ~ "976",
                     fund=="0515" & object=="4470" & type=="00" ~ "976",
                    object=="4491" & (fund=="0188"|fund=="0189") ~ "976",
                     fund=="0187" & object=="4470" ~ "976",
                     fund=="0186" & object=="4470" ~ "976",
                    object=="4491" & (fund=="0413"|fund=="0414"|fund=="0415")  ~ "975", #mft to local
                  fund == "0952"~ "975", # Added Sept 29 2022 AWM. Transportation Renewal MFT
                    TRUE ~ as.character(agency)),
  
  agency_name = case_when(agency == "971"~ "INCOME TAX 1/10 TO LOCAL",
                          agency == "972" ~ "PPRT TRANSFER TO LOCAL",
                          agency == "976" ~ "GST TO LOCAL",
                          agency == "975" ~ "MFT TO LOCAL",
                          TRUE~as.character(agency_name)),
  group = ifelse(agency>"970" & agency < "977", as.character(agency), as.character(group)))


table(exp_temp$group)
## 
##   101   102   103   105   107   108   109   110   112   115   120   131   140 
##   583     3   240   155    89   193   137   129   162   128    17   386     7 
##   155   156   167   201   210   275   280   285   290   295   310   330   340 
##    75   117   118  1345    15   399     1   234   470  1185   213   205   819 
##   350   360   370   402   406   416   418   420   422   425   426   427   440 
##  4098  1738   803  1829  4660  3932  2420 10975  9668  1038  7614   779  3705 
##   442   444   445   446   448   452   458   466   478   482   492   493   494 
##   596 11357    23  1119    22   610   305   587  3063  5524  4129  1924  9550 
##   497   503   506   507   509   510   511   517   520   524   525   526   527 
##  2519   421    17   332    33    26  8954   128     5  1126    28   174    40 
##   528   529   532   533   534   537   540   541   542   546   548   554   555 
##  1838    18  5746     2     5   192    64  1305   174   873   264    26    25 
##   557   558   559   562   563   564   565   567   568   569   571   574   575 
##   208   280   245    19   699    17   198   176     2   450    65    80    85 
##   576   578   579   580   583   585   586   587   588   589   590   591   592 
##     1   233   438   327    21    43  5297   683  2681   597   166   188  1070 
##   593   598   601   608   612   616   620   628   636   644   664   676   684 
##   151    10   720   177   131   141    99   147   115   182   271   462   895 
##   691   692   693   695   901   971   972   975   976 
##   934   786     8   197  8972    25    25    84  1174
transfers_long <- exp_temp %>% 
  filter(group == "971" |group == "972" | group == "975" | group == "976")

transfers <- transfers_long %>%
  group_by(fy, group ) %>%
  summarize(sum_expenditure = sum(expenditure)/1000000) %>%
  pivot_wider(names_from = "group", values_from = "sum_expenditure", names_prefix = "exp_" )

exp_temp <- anti_join(exp_temp, transfers_long)


dropped_inff_0 <- exp_temp %>% filter(in_ff == 0)

exp_temp <- exp_temp %>% filter(in_ff == 1) # drops in_ff = 0 funds AFTER dealing with net-revenue above

Debt Service

Debt Service expenditures include interest payment on both short-term and long-term debt. We do not include escrow or principal payments.

Decision from Sept 30 2022:

We are no longer including short term principal payments as a cost; only interest on short term borrowing is a cost. Pre FY22 and the FY21 correction, we did include an escrow payment and principle payments as costs but not bond proceeds as revenues. This caused expenditures to be inflated because we were essentially counting debt twice - the principle payment and whatever the money was spent on in other expenditure categories, which was incorrect.

  • Include interest for long-term debt that likely funds capital projects.

8813 interest INCLUDE AS COST
8811 is for principle EXCLUDE from analysis
8841 is for escrow payments EXCLUDE from analysis
8800 is for capital projects (including the Tollway) INCLUDE as cost - Note: debt principle and interest are both included because they are combined in the data observations; bond proceeds are not considered a revenue source

  • Exclude: Bond principle payments: obj_seq_type == 88110008
    • Exclude based on Merriman’s meeting. Run it both ways to compare output to make sure we made the correct decision.
  • Exclude: Short term borrowing principle: obj_seq_type == 88110108
    • except for 2021 and 2022 which have GO Bond principle under this code????
    • based on the numbers, I think it is still short term principle with wrong appropriation name
  • Include: General Obligation Bond Interest: obj_seq_type == 88130000 & 88130008
  • Include: Interest for short-term borrowing: 88130108
  • Exclude: Escrow payment == 88410008
  • Include: Build IL Bonds principal AND interest
    • Tollway is obj_seq_type == 88000055, filter out fund == 0455 to avoid tollway debt
    • fund == 0455 is the IL State Toll Highway fund, items mostly for operations and maintenance

Filtering for interest on short term borrowing and GO bonds (8813_ _ _ _) and GO bond principal amounts (88130008).
- object == 8813 is for interest but obj_seq_type is used just to be more specific below.

# GO bond principal and GO bond interest
GObond_debt <- exp_temp %>% 
  filter(obj_seq_type == "88110008" |obj_seq_type == "88130000" | obj_seq_type == "88130008") %>% 
  group_by(fy, obj_seq_type) %>% 
  summarize(sum = sum(expenditure, na.rm=TRUE)) %>% 
  pivot_wider(names_from = obj_seq_type, values_from = sum) %>% 
  mutate(principal = `88110008`,
         interest = sum(`88130008`+`88130000`, na.rm = TRUE),
         ratio = (as.numeric(interest)/as.numeric(principal)))

GObond_debt %>% select(principal, interest, ratio) %>%
  mutate(across(principal:interest, ~format(., big.mark= ",", scientific = F)))
GObond_debt %>% ggplot() + 
  geom_line(aes(x=fy, y=principal, color = "Principal"))+ 
  geom_line(aes(x=fy, y=interest, color = "Interest"))

# short term borrowing, first observation is in 2004?
short_debt <- exp_temp %>% 
  filter(obj_seq_type == 88110108 |obj_seq_type == 88130108) %>% 
  group_by(fy, obj_seq_type) %>% 
  summarize(sum = sum(expenditure, na.rm=TRUE)) %>% 
  pivot_wider(names_from = obj_seq_type, values_from = sum) %>% 
  mutate(principal = `88110108`,
         interest = `88130108`,
         ratio = (as.numeric(interest)/as.numeric(principal)))

short_debt %>% select(principal, interest, ratio) %>%
  mutate(across(principal:interest, ~format(., big.mark= ",",  scientific = F)))
short_debt %>% ggplot() + 
  geom_col(aes(x=fy, y=principal/1000000, fill = "Principal"))+ 
  geom_col(aes(x=fy, y=interest/1000000, fill = "Interest")) + 
  labs(title = "Short Term Borrowing: Principal and Interest Payments")

capitalprojects <- exp_temp %>%filter(object == "8800")


all_debt <- exp_temp %>% 
  filter(fund != "0455" & (object == "8811" |object == "8813" | object == "8800") )%>% 
  group_by(fy, object) %>% 
  summarize(sum = sum(expenditure, na.rm=TRUE)) %>% 
  pivot_wider(names_from = object, values_from = sum) %>% 
  mutate(principal = `8811`,
         interest = `8813`,
         BuildIL = `8800`,
         ratio = (as.numeric(interest)/as.numeric(principal)))

all_debt %>% select(principal, interest, BuildIL, ratio) %>%
  mutate(across(principal:BuildIL, ~format(., big.mark= ",", scientific = F)))
all_debt %>% ggplot() + 
  geom_line(aes(x=fy, y=principal/1000000, color = "Principal"))+ 
  geom_line(aes(x=fy, y=interest/1000000, color = "Interest"))+
  geom_line(aes(x=fy, y = BuildIL / 1000000, color = "Build IL Bonds"))+
  labs(y = "Debt ($Millions)", title = "Short term borrowing and GO Bonds",
       subtitle = "Principal and Interest payments")

Capital projects include the IL Civic Center and Build Illinois Bonds. Tollway principal and interest has been dropped from Debt Service but is counted in Tollway Expenditure Cost.

debt_drop <- exp_temp %>% 
  filter(object == "8841" |  object == "8811")  
# escrow  OR  principle

#debt_drop %>% group_by(fy) %>% summarize(sum = sum(expenditure)) %>% arrange(-fy)


debt_keep <- exp_temp %>% 
  filter(fund != "0455" & (object == "8813" | object == "8800" )) 
# examine the debt costs we want to include

#debt_keep %>% group_by(fy) %>% summarize(sum = sum(expenditure)) %>% arrange(-fy) 


exp_temp <- anti_join(exp_temp, debt_drop) 
exp_temp <- anti_join(exp_temp, debt_keep)

debt_keep <- debt_keep %>%
  mutate(
    agency = ifelse(fund != "0455" & (object == "8813" | object == "8800"), "903", as.character(agency)),
    group = ifelse(fund != "0455" & (object == "8813" | object == "8800"), "903", as.character(group)),
    in_ff = ifelse(group == "903", 1, as.character(in_ff)))

debt_keep_yearly <- debt_keep %>% group_by(fy, group) %>% summarize(debt_cost = sum(expenditure,na.rm=TRUE)/1000000) %>% select(-group)

Medicaid

Medicaid. That portion of the Healthcare and Family Services (or Public Aid in earlier years, agency code 478) budget for Medical (appr_organization code 65) for awards and grants (object codes 4400 and 4900).

State CURE will remain in the Medicaid expenditure category due to the nature of it being federal funds providing public health services and funding to locations that provide public services.

  • Uses same appropriation name of “HEALTHCARE PROVIDER RELIEF” and fund == 0793 and obj_seq_type == 49000000. So can defend the “mistake” of including healthcare provider relief as Medicaid expenditure.

Add Fiscal Future group codes

exp_temp <- exp_temp %>%
  #mutate(agency = as.numeric(agency) ) %>%
  # arrange(agency)%>%
  mutate(
    group = case_when(
      agency>"100"& agency<"200" ~ "910", # legislative
      
      agency == "528"  | (agency>"200" & agency<"300") ~ "920", # judicial
      pension>0  ~ "901", # pensions
      (agency>"309" & agency<"400") ~ "930",    # elected officers
      
      agency == "586" ~ "959", # create new K-12 group

      agency=="402" | agency=="418" | agency=="478" | agency=="444" | agency=="482" ~ as.character(agency), # aging, CFS, HFS, human services, public health
      T ~ as.character(group))
    ) %>%      

  
  mutate(group = case_when(
    agency=="478" & (appr_org=="01" | appr_org == "65" | appr_org=="88") & (object=="4900" | object=="4400") ~ "945", # separates CHIP from health and human services and saves it as Medicaid
    
    agency == "586" & fund == "0355" ~ "945",  # 586 (Board of Edu) has special education which is part of medicaid
    
    # OLD CODE: agency == "586" & appr_org == "18" ~ "945", # Spec. Edu Medicaid Matching
    
    agency=="425" | agency=="466" | agency=="546" | agency=="569" | agency=="578" | agency=="583" | agency=="591" | agency=="592" | agency=="493" | agency=="588" ~ "941", # public safety & Corrections
    
    agency=="420" | agency=="494" |  agency=="406" | agency=="557" ~ as.character(agency), # econ devt & infra, tollway
    
    agency=="511" | agency=="554" | agency=="574" | agency=="598" ~ "946",  # Capital improvement
    
    agency=="422" | agency=="532" ~ as.character(agency), # environment & nat. resources
    
    agency=="440" | agency=="446" | agency=="524" | agency=="563"  ~ "944", # business regulation
    
    agency=="492" ~ "492", # revenue
    
    agency == "416" ~ "416", # central management services
    agency=="448" & fy > 2016 ~ "416", #add DoIT to central management 
    
    T ~ as.character(group))) %>%
  
  
  mutate(group = case_when(
    agency=="684" | agency=="691"  ~ as.character(agency),
    
    agency=="692" | agency=="695" | (agency>"599" & agency<"677") ~ "960", # higher education
    
    agency=="427"  ~ as.character(agency), # employment security
    
    agency=="507"|  agency=="442" | agency=="445" | agency=="452" |agency=="458" | agency=="497" ~ "948", # other departments
    
    # other boards & Commissions
    agency=="503" | agency=="509" | agency=="510" | agency=="565" |agency=="517" | agency=="525" | agency=="526" | agency=="529" | agency=="537" | agency=="541" | agency=="542" | agency=="548" |  agency=="555" | agency=="558" | agency=="559" | agency=="562" | agency=="564" | agency=="568" | agency=="579" | agency=="580" | agency=="587" | agency=="590" | agency=="527" | agency=="585" | agency=="567" | agency=="571" | agency=="575" | agency=="540" | agency=="576" | agency=="564" | agency=="534" | agency=="520" | agency=="506" | agency == "533" ~ "949", 
    
    # non-pension expenditures of retirement funds moved to "Other Departments"
    # should have removed pension expenditures already from exp_temp in Pensions step above
    agency=="131" | agency=="275" | agency=="589" |agency=="593"|agency=="594"|agency=="693" ~ "948",
    
    T ~ as.character(group))) %>%

  mutate(group_name = 
           case_when(
             group == "416" ~ "Central Management",
             group == "478" ~ "Healthcare and Family Services",
             group == "482" ~ "Public Health",
             group == "900" ~ "NOT IN FRAME",
             group == "901" ~ "STATE PENSION CONTRIBUTION",
             group == "903" ~ "DEBT SERVICE",
             group == "910" ~ "LEGISLATIVE"  ,
             group == "920" ~ "JUDICIAL" ,
             group == "930" ~ "ELECTED OFFICERS" , 
             group == "940" ~ "OTHER HEALTH-RELATED", 
             group == "941" ~ "PUBLIC SAFETY" ,
             group == "942" ~ "ECON DEVT & INFRASTRUCTURE" ,
             group == "943" ~ "CENTRAL SERVICES",
             group == "944" ~ "BUS & PROFESSION REGULATION" ,
             group == "945" ~ "MEDICAID" ,
             group == "946" ~ "CAPITAL IMPROVEMENT" , 
             group == "948" ~ "OTHER DEPARTMENTS" ,
             group == "949" ~ "OTHER BOARDS & COMMISSIONS" ,
             group == "959" ~ "K-12 EDUCATION" ,
             group == "960" ~ "UNIVERSITY EDUCATION" ,
             group == agency ~ as.character(group),
             TRUE ~ "Check name"),
         year = fy)

exp_temp %>% filter(group_name == "Check name")
#write_csv(exp_temp, "all_expenditures_recoded.csv")

All expenditures recoded but not aggregated: Allows for inspection of individual expenditures within larger categories. This stage of the data is extremely useful for investigating almost all questions we have about the data.

Note that these are the raw figures BEFORE we take the additional steps:

  • Subtract employee insurance premiums from State Employee Healthcare expenditures

  • Subtract tax refunds from tax revenues by revenue type.

  • Subtract employee pension contributions (originally a dropped revenue) from State Pension expenditures

  • NOT DOING ANYMORE: Add employee health costs and certain pension contributions to All Other Revenues

Modify Revenue data

Revenue Categories NOT included in Fiscal Futures:
- 32. Garnishment-Levies. (State is fiduciary, not beneficiary.)
- 45. Student Fees-Universities. (Excluded from state-level budget.)
- 51. Retirement Contributions (of individuals and non-state entities).
- 66. Proceeds, Investment Maturities. (Not sustainable flow.)
- 72. Bond Issue Proceeds. (Not sustainable flow.)
- 75. Inter-Agency Receipts.
- 79. Cook County Intergovernmental Transfers. (State is not beneficiary.)
- 98. Prior Year Refunds.

All Other Sources

Expanded to include the following smaller sources:
- 30. Horse Racing Taxes & Fees.
- 60. Other Grants and Contracts.
- 63. Investment Income.

For aggregating revenue, use the rev_1998_2022 dataframe, join the funds_ab_in_2022 file to it, and then join the ioc_source_type file to the dataset.

You need to update the funds_ab_in and ioc_source_type file every year!

include how to do that later

# fund info to revenue for all years
rev_temp <- inner_join(rev_1998_2022, funds_ab_in_2022, by = "fund") %>% arrange(source)

# need to update the ioc_source_type file every year! 
ioc_source_type <- readxl::read_xlsx("C:/Users/aleaw/OneDrive/Documents/PhD Fall 2021 - Spring 2022/Merriman RA/Fiscal Futures FY2022/Replication-Files/ioc_source_updated22_AWM.xlsx")

rev_temp <- left_join(rev_temp, ioc_source_type, by = "source")
# automatically used source, source name does not match for the join to work using source_name

Update Agencies: Early agencies replaced by successors

# recodes old agency numbers to consistent agency number
rev_temp <- rev_temp %>% 
  mutate(agency = case_when(
    (agency=="438"| agency=="475" |agency == "505") ~ "440",
    # financial institution &  professional regulation &
     # banks and real estate  --> coded as  financial and professional reg
    agency == "473" ~ "588", # nuclear safety moved into IEMA
    (agency =="531" | agency =="577") ~ "532", # coded as EPA
    (agency =="556" | agency == "538") ~ "406", # coded as agriculture
    agency == "560" ~ "592", # IL finance authority (fire trucks and agriculture stuff)to state fire marshal
    agency == "570" & fund == "0011" ~ "494",   # city of Chicago road fund to transportation
    TRUE ~ (as.character(agency)))) 

Federal to State Transfers

rev_temp <- rev_temp %>% 
  mutate(
    rev_type = ifelse(rev_type=="57" & agency=="478" & (source=="0618"|source=="2364"|source=="0660"|source=="1552"| source=="2306"| source=="2076"|source=="0676"|source=="0692"), "58", rev_type),
    rev_type_name = ifelse(rev_type=="58", "FEDERAL TRANSPORTATION", rev_type_name),
    rev_type = ifelse(rev_type=="57" & agency=="494", "59", rev_type),
    rev_type_name = ifelse(rev_type=="59", "FEDERAL TRANSPORTATION", rev_type_name),
    rev_type_name = ifelse(rev_type=="57", "FEDERAL OTHER", rev_type_name),
    rev_type = ifelse(rev_type=="6", "06", rev_type),
    rev_type = ifelse(rev_type=="9", "09", rev_type)) 


rev_temp %>% 
  group_by(fy, rev_type_name) %>% 
  summarise(receipts = sum(receipts, na.rm = TRUE)/1000000) %>% 
  pivot_wider(names_from = rev_type_name, values_from = receipts)

Pension Contributions

Employee contributions to pension are a revenue source for the state. In order to get the net cost of pensions for the state, employee contributions should be subtracted in order to calculate net costs.

  • current year employee revenue source = 0573
  • contributions by employee == 572 (stops at 2011)
#pension_rev_check <- rev_temp %>% filter(source == "0572" | source == "0573" | source == "0574" | source == "0577" | source == "1982" | source == "2567")

#write_csv(pension_rev_check, "pension_revenue_check.csv")


# current year employee revenue source = 0573, contributions by employee == 572 (stops at 2011)
pension_rev <- rev_temp %>% 
  filter(rev_type == "51" & source == "0573" | source == "0572")


rev_type <- anti_join(rev_temp, pension_rev)

pension_rev_yearly <- rev_temp %>% 
  filter(rev_type == "51" & source == "0573" | source == "0572") %>% 
  group_by(fy, rev_type) %>%
  summarise(pension_rev_sum = sum(receipts, na.rm=TRUE)/1000000) %>% select(-rev_type)

pension_rev should be subtracted from the state pension expenditures. Employee contributions to pensions are a revenue source. We want net pension cost, therefore subtract employee contributions from pension costs.

Health Insurance Premiums from Employees

Employee insurance premiums for healthcare are a revenue source for the state. In order to get the true cost of State Employee Healthcare, total employee insurance premiums should be subtracted from the healthcare expenditure total (exp_904 - premiums). Gather all employee premiums into opt_premiums and subtract it from the Expenditure table after aggregating and pivoting steps.

0120 = ins prem-option life 0120 = ins prem-optional life/univ

0347 = optional health - HMO 0348 = optional health - dental 0349 = optional health - univ/local SI 0350 = optional health - univ/local 0351 = optional health - retirement 0352 = optional health - retirement SI 0353 = optional health - retire/dental 0354 = optional health - retirement hmo

2199-2209 = various HMOs, dental, health plans from Health Insurance Reserve (fund)

opt_premiums_CHECK <- rev_temp %>% 
  filter((fund=="0907" | fund == "0457") & (source=="0120" | source=="0121" | (source>"0345" & source<"0357") | (source>"2199" & source<"2209")                                            ) 
         )

#collect optional insurance premiums to fund 0907 for use in eehc expenditure  
rev_temp <- rev_temp %>% 
  mutate(med_option_recent = ifelse(
    fund=="0907" & (source=="0120"| source=="0121"| (source>"0345" & source<"0357")|(source>"2199" & source<"2209")), 1, 0),
    
    # adds more rev_type codes
    rev_type = case_when(
      fund =="0427" ~ "12", # pub utility tax
      fund == "0742" | fund == "0473" ~ "24", # insurance and fees
      fund == "0976" ~ "36",# receipts from rev producing
      fund == "0392" |fund == "0723" ~ "39", # licenses and fees
      fund == "0656" ~ "78", #all other rev sources
      TRUE ~ as.character(rev_type)))
# if not mentioned, then rev_type as it was



# optional insurance premiums = employee insurance premiums
med_option_recent <- rev_temp %>%
  group_by(fy, med_option_recent) %>%
  summarize(med_option_amt_recent = sum(receipts)/1000000) %>%
  filter(med_option_recent == 1) %>%
  rename(year = fy) %>% 
  select(-med_option_recent)

med_option_long <- rev_temp %>%  filter(med_option_recent == 1)
# 361 observations have med_option_recent == 1


rev_temp <- rev_temp %>% filter(med_option_recent != 1)
# could also do an antijoin with rev_temp and med_option dataframes

You must then subtract the employee premiums from the state healthcare expenditures totals. Employee premiums on the revenue side should have receipts coded as $0 or be dropped using anti_join().

Transfers in and Out:

I don’t have much faith in the transfers in and out steps- AWM

I am currently choosing to exclude the totals from this step. Overall, this decreases the total revenues in “All Other Revenues” by a few million dollars.

  • in_from_out <- c(“0847”, “0867”, “1175”, “1176”, “1177”, “1178”, “1181”, “1182”, “1582”, “1592”, “1745”, “1982”, “2174”, “2264”)
  • See the methodology document for the list of what these sources/funds are.
rev_temp <- rev_temp %>% 
  filter(in_ff == 1) %>% 
  mutate(local = ifelse(is.na(local), 0, local)) %>% # drops all revenue observations that were coded as "local == 1"
  filter(local != 1)

# 1175 doesnt exist?
in_from_out <- c("0847", "0867", "1175", "1176", "1177", "1178", "1181", "1182", "1582", "1592", "1745", "1982", "2174", "2264")

# what does this actually include:
# all are items with rev_type = 75 originally. 
in_out_df <- rev_type %>%
  mutate(infromout = ifelse(source %in% in_from_out, 1, 0)) %>%
  filter(infromout == 1)

rev_temp <- rev_temp %>% 
  mutate(rev_type_new = ifelse(source %in% in_from_out, "76", rev_type))
# if source contains any of the codes in in_from_out, code them as 76 (all other rev).
# revenue types to drop
drop_type <- c("32", "45", "51", 
               "66", "72", "75", "79", "98")

# drops Blank, Student Fees, Retirement contributions, proceeds/investments,
# bond issue proceeds, interagency receipts, cook IGT, Prior year refunds.


rev_temp <- rev_temp %>% filter(!rev_type_new %in% drop_type)
# keep observations that do not have a revenue type mentioned in drop_type

table(rev_temp$rev_type_new)
## 
##    02    03    06    09    12    15    18    21    24    27    30    31    33 
##   161   124   828   127   575   258    45  1420   450    76   659   124   130 
##    35    36    39    42    48    54    57    58    59    60    63    76    78 
##   660  5152  9044  2755    31  1239  6451   620   226   103  5081   154 10880 
##    99 
##   964
rev_temp %>% 
  group_by(fy, rev_type_new) %>% 
  summarize(total_reciepts = sum(receipts)/1000000) %>%
  pivot_wider(names_from = rev_type_new, values_from = total_reciepts, names_prefix = "rev_") 
# combines smallest 4  categories to to "Other"
# they were the 4 smallest in past years, are they still the 4 smallest? 

rev_temp <- rev_temp %>%  
  mutate(rev_type_new = ifelse(rev_type=="30" | rev_type=="60" | rev_type=="63" | rev_type=="76", "78", rev_type_new))


#table(rev_temp$rev_type_new)  # check work


rm(rev_1998_2022)
rm(exp_1998_2022)

Pivoting and Merging

  • State employee contributions (eehc from eehc2_amt) should be subtracted from state employee healthcare expenditures. State employer contributions should be dropped to avoid double counting costs.

  • Subtract employee insurance premiums from 904 (State Employee Healthcare Expenditures - Employee Premiums = Actual state healthcare costs. Subtract med_option_amt_recent in med_option_recent from exp_904 in ff_exp).

  • Not doing anymore: State pension contributions funded by bonds (pension_amt from pension2_fy22) should be added to Other revenues.

    • Maybe right??: Employee contributions should be subtracted from from state pension costs.
    • exp_901 - rev_51? even though rev_type == 51 (retirement contributions) is dropped based on past code.
  • Local Government Transfers (exp_970) should be on the expenditure side

Revenues

I chose to drop rev_76 for Transfers in and Out because I do not understand why that step occurs. If I keep rev_76 in and include it in rev_78 for All Other Revenues, then the difference between R and Stata code should be resolved.
- after Stata code is edited to drop employee insurance premium revenue from all other revenues. Currently it keeps it in AND subtracts it from state healthcare expenditures.

ff_rev <- rev_temp %>% 
  group_by(rev_type_new, fy) %>% 
  summarize(sum_receipts = sum(receipts, na.rm=TRUE)/1000000 ) %>%
  pivot_wider(names_from = "rev_type_new", values_from = "sum_receipts", names_prefix = "rev_")

ff_rev<- left_join(ff_rev, tax_refund)

#ff_rev <- left_join(ff_rev, pension2_fy22, by=c("fy" = "year"))

#ff_rev <- left_join(ff_rev, eehc2_amt) 
ff_rev <- mutate_all(ff_rev, ~replace_na(.,0))


ff_rev <- ff_rev %>%
  mutate(rev_02 = rev_02 - ref_02,
         rev_03 = rev_03 - ref_03,
         rev_06 = rev_06 - ref_06,
         rev_09 = rev_09 - ref_09,
         rev_21 = rev_21 - ref_21,
         rev_24 = rev_24 - ref_24,
         rev_35 = rev_35 - ref_35

      #   rev_78new = rev_78 #+ pension_amt #+ eehc
         ) %>% 
  select(-c(ref_02:ref_35, rev_99, rev_NA, rev_76#, pension_amt , rev_76,
          #  , eehc
            ))

ff_rev

Since I already pivot_wider()ed the table in the previous code chunk, I now change each column’s name by using rename() to set new variable names. Ideally the final dataframe would have both the variable name and the variable label but I have not done that yet.

aggregate_rev_labels <- ff_rev %>%
  rename("INDIVIDUAL INCOME TAXES, gross of local, net of refunds" = rev_02,
         "CORPORATE INCOME TAXES, gross of PPRT, net of refunds" = rev_03,
         "SALES TAXES, gross of local share" = rev_06 ,
         "MOTOR FUEL TAX, gross of local share, net of refunds" = rev_09 ,
         "PUBLIC UTILITY TAXES, gross of PPRT" = rev_12,
         "CIGARETTE TAXES" = rev_15 ,
         "LIQUOR GALLONAGE TAXES" = rev_18,
         "INHERITANCE TAX" = rev_21,
         "INSURANCE TAXES&FEES&LICENSES, net of refunds" = rev_24 ,
         "CORP FRANCHISE TAXES & FEES" = rev_27,
      #   "HORSE RACING TAXES & FEES" = rev_30,  # in Other
         "MEDICAL PROVIDER ASSESSMENTS" = rev_31 ,
         # "GARNISHMENT-LEVIES " = rev_32 , # dropped
         "LOTTERY RECEIPTS" = rev_33 ,
         "OTHER TAXES" = rev_35,
         "RECEIPTS FROM REVENUE PRODUCNG" = rev_36, 
         "LICENSES, FEES & REGISTRATIONS" = rev_39 ,
         "MOTOR VEHICLE AND OPERATORS" = rev_42 ,
         #  "STUDENT FEES-UNIVERSITIES" = rev_45,   # dropped
         "RIVERBOAT WAGERING TAXES" = rev_48 ,
         # "RETIREMENT CONTRIBUTIONS " = rev_51, # dropped
         "GIFTS AND BEQUESTS" = rev_54, 
         "FEDERAL OTHER" = rev_57 ,
         "FEDERAL MEDICAID" = rev_58, 
         "FEDERAL TRANSPORTATION" = rev_59 ,
      #   "OTHER GRANTS AND CONTRACTS" = rev_60, #other
       #  "INVESTMENT INCOME" = rev_63, # other
         # "PROCEEDS,INVESTMENT MATURITIES" = rev_66 , #dropped
         # "BOND ISSUE PROCEEDS" = rev_72,  #dropped
         # "INTER-AGENCY RECEIPTS" = rev_75,  #dropped
     #    "TRANSFER IN FROM OUT FUNDS" = rev_76,  #other
         "ALL OTHER SOURCES" = rev_78,
         # "COOK COUNTY IGT" = rev_79, #dropped
         # "PRIOR YEAR REFUNDS" = rev_98 #dropped
  ) 

aggregate_rev_labels
# Still contains columns that should be dropped for the clean final aggregate table. Drop the variables I don't want in the output table in the "graphs" section.  

Expenditures

Create state employee healthcare costs that reflects the health costs minus the optional insurance premiums that came in (904_new = 904 - med_option_amt_recent).

Create exp_970 for all local government transfers (exp_971 + exp_972 + exp_975 + exp_976).

ff_exp <- exp_temp %>% 
  group_by(fy, group) %>% 
  summarize(sum_expenditures = sum(expenditure, na.rm=TRUE)/1000000 ) %>%
  pivot_wider(names_from = "group", values_from = "sum_expenditures", names_prefix = "exp_")%>%
  
    left_join(debt_keep_yearly) %>%
  mutate(exp_903 = debt_cost) %>%

    left_join(healthcare_costs_yearly) %>%

  # join state employee healthcare and subtract employee premiums
  left_join(med_option_recent, by = c("fy" = "year")) %>%
  mutate(exp_904_new = (`healthcare_cost` - `med_option_amt_recent`)) %>% # state employee healthcare premiums
  
  left_join(pension_rev_yearly) %>%
      mutate(exp_901_new = exp_901 - pension_rev_sum) %>% #employee pension contributions


  # join local transfers and create exp_970
  left_join(transfers) %>%
  mutate(exp_970 = exp_971 + exp_972  + exp_975 + exp_976)

ff_exp<- ff_exp %>% select(-c(exp_901, med_option_amt_recent, debt_cost, healthcare_cost, pension_rev_sum, exp_971:exp_976)) # drop unwanted columns
ff_exp

Clean Table Outputs

Create total revenues and total expenditures only:

  • after aggregating expenditures and revenues, pivoting wider, and left_joining the additional mini dataframes (med_option_recent, pension2_fy22, eehc2_amt), then I want to drop the columns that I no longer want and then pivot_longer(). After pivoting_longer() and creating rev_long and exp_long, expenditures and revenues are in the same format and can be combined together for the totals and gap each year.
rev_long <- pivot_longer(ff_rev, rev_02:rev_78, names_to = c("type","Category"), values_to = "Dollars", names_sep = "_") %>% 
  rename(Year = fy) %>%
  mutate(Category_name = case_when(
    Category == "02" ~ "INDIVIDUAL INCOME TAXES, gross of local, net of refunds" ,
    Category == "03" ~ "CORPORATE INCOME TAXES, gross of PPRT, net of refunds" ,
    Category == "06" ~ "SALES TAXES, gross of local share" ,
    Category == "09" ~ "MOTOR FUEL TAX, gross of local share, net of refunds" ,
    Category == "12" ~ "PUBLIC UTILITY TAXES, gross of PPRT" ,
    Category == "15" ~ "CIGARETTE TAXES" ,
    Category == "18" ~ "LIQUOR GALLONAGE TAXES" ,
    Category == "21" ~ "INHERITANCE TAX" ,
    Category == "24" ~ "INSURANCE TAXES&FEES&LICENSES, net of refunds " ,
    Category == "27" ~ "CORP FRANCHISE TAXES & FEES" ,
    Category == "30" ~ "HORSE RACING TAXES & FEES",  # in Other
    Category == "31" ~ "MEDICAL PROVIDER ASSESSMENTS" ,
    Category == "32" ~ "GARNISHMENT-LEVIES" , # dropped
    Category == "33" ~  "LOTTERY RECEIPTS" ,
    Category == "35" ~  "OTHER TAXES" ,
    Category == "36" ~  "RECEIPTS FROM REVENUE PRODUCNG", 
    Category == "39" ~  "LICENSES, FEES & REGISTRATIONS" ,
    Category == "42" ~  "MOTOR VEHICLE AND OPERATORS" ,
    Category == "45" ~  "STUDENT FEES-UNIVERSITIES",   # dropped
    Category == "48" ~  "RIVERBOAT WAGERING TAXES" ,
    Category == "51" ~  "RETIREMENT CONTRIBUTIONS" , # dropped
    Category == "54" ~ "GIFTS AND BEQUESTS", 
    Category == "57" ~  "FEDERAL OTHER" ,
    Category == "58" ~  "FEDERAL MEDICAID", 
    Category == "59" ~  "FEDERAL TRANSPORTATION" ,
    Category == "60" ~  "OTHER GRANTS AND CONTRACTS", #other
    Category == "63" ~  "INVESTMENT INCOME", # other
    Category == "66" ~ "PROCEEDS,INVESTMENT MATURITIES" , #dropped
    Category == "72" ~ "BOND ISSUE PROCEEDS",  #dropped
    Category == "75" ~  "INTER-AGENCY RECEIPTS ",  #dropped
    Category == "76" ~  "TRANSFER IN FROM OUT FUNDS",  #other
    Category == "78" ~  "ALL OTHER SOURCES" ,
    Category == "79" ~   "COOK COUNTY IGT", #dropped
    Category == "98" ~  "PRIOR YEAR REFUNDS", #dropped
                 T ~ "Check Me!"

  ) )


exp_long <- pivot_longer(ff_exp, exp_402:exp_970 , names_to = c("type", "Category"), values_to = "Dollars", names_sep = "_") %>% 
  rename(Year = fy ) %>% 
  mutate(Category_name = 
           case_when(
             Category == "402" ~ "AGING" ,
             Category == "406" ~ "AGRICULTURE", 
             Category == "416" ~ "CENTRAL MANAGEMENT",
             Category == "418" ~ "CHILDREN AND FAMILY SERVICES", 
             Category == "420" ~ "COMMERCE AND ECONOMIC OPPORTUNITY",
             Category == "422" ~ "NATURAL RESOURCES" ,
             Category == "426" ~ "CORRECTIONS",
             Category == "427" ~ "EMPLOYMENT SECURITY" ,
             Category == "444" ~ "HUMAN SERVICES" ,
             Category == "448" ~ "Innovation and Technology", # AWM added fy2022
             Category == "478" ~ "HEALTHCARE & FAM SER NET OF MEDICAID", 
             Category == "482" ~ "PUBLIC HEALTH", 
             Category == "492" ~ "REVENUE", 
             Category == "494" ~ "TRANSPORTATION" ,
             Category == "532" ~ "ENVIRONMENTAL PROTECT AGENCY" ,
             Category == "557" ~ "IL STATE TOLL HIGHWAY AUTH" ,
             Category == "684" ~ "IL COMMUNITY COLLEGE BOARD", 
             Category == "691" ~ "IL STUDENT ASSISTANCE COMM" ,
             Category == "900" ~ "NOT IN FRAME",
             Category == "901" ~ "STATE PENSION CONTRIBUTION",
             Category == "903" ~ "DEBT SERVICE",
             Category == "904" ~ "State Employee Healthcare",
             Category == "910" ~ "LEGISLATIVE"  ,
             Category == "920" ~ "JUDICIAL" ,
             Category == "930" ~ "ELECTED OFFICERS" , 
             Category == "940" ~ "OTHER HEALTH-RELATED", 
             Category == "941" ~ "PUBLIC SAFETY" ,
             Category == "942" ~ "ECON DEVT & INFRASTRUCTURE" ,
             Category == "943" ~ "CENTRAL SERVICES",
             Category == "944" ~ "BUS & PROFESSION REGULATION" ,
             Category == "945" ~ "MEDICAID" ,
             Category == "946" ~ "CAPITAL IMPROVEMENT" , 
             Category == "948" ~ "OTHER DEPARTMENTS" ,
             Category == "949" ~ "OTHER BOARDS & COMMISSIONS" ,
             Category == "959" ~ "K-12 EDUCATION" ,
             Category == "960" ~ "UNIVERSITY EDUCATION",
             Category == "970" ~ "Local Govt Transfers",
             T ~ "CHECK ME!")
           )

 # write_csv(exp_long, "expenditures_recoded_long_FY22.csv")
 # write_csv(rev_long, "revenue_recoded_long_FY22.csv")

aggregated_totals_long <- rbind(rev_long, exp_long)
aggregated_totals_long
year_totals <- aggregated_totals_long %>% 
  group_by(type, Year) %>% 
  summarize(Dollars = sum(Dollars, na.rm = TRUE)) %>% 
  pivot_wider(names_from = "type", values_from = Dollars) %>% 
  rename(
         Expenditures = exp,
         Revenue = rev) %>%  
  mutate(Gap = Revenue - Expenditures) %>% 
  arrange(desc(Year))
# creates variable for the Gap each year

year_totals
# write_csv(aggregated_totals_long, "aggregated_totals.csv")

Graphs

Graphs made from aggregated_totals_long dataframe.

year_totals %>%  
  ggplot() +
  # geom_smooth adds regression line, graphed first so it appears behind line graph
  geom_smooth(aes(x = Year, y = Revenue), color = "light green", method = "lm", se = FALSE) + 
  geom_smooth(aes(x = Year, y = Expenditures), color = "gray", method = "lm", se = FALSE) +
  
  # line graph of revenue and expenditures
  geom_line(aes(x = Year, y = Revenue), color = "green4") +
  geom_line(aes(x = Year, y = Expenditures), color = "black") +
  
  # labels
    theme_bw() +
  scale_y_continuous(labels = comma)+
  xlab("Year") + 
  ylab("Millions of Dollars")  +
  ggtitle("Illinois Expenditures and Revenue Totals, 1998-2022")

Expenditure and revenue amounts in millions of dollars:

exp_long %>%
  filter(Year == 2022) %>%
  #mutate(`Total Expenditures`= sum(Dollars, na.rm = TRUE)) %>%
 # select(-c(Year, `Total Expenditures`)) %>%
  arrange(desc(`Dollars`)) %>%
  ggplot() + 
  geom_col(aes(x = fct_reorder(Category_name, `Dollars`), y = `Dollars`))+ 
  coord_flip() +
      theme_bw() +
  labs(title = "Expenditures for FY2022") +
    xlab("Expenditure Categories") +
  ylab("Millions of Dollars") 

rev_long %>%
  filter(Year == 2022) %>%
  #mutate(`Total Expenditures`= sum(Dollars, na.rm = TRUE)) %>%
 # select(-c(Year, `Total Expenditures`)) %>%
  arrange(desc(`Dollars`)) %>%
  ggplot() + 
  geom_col(aes(x = fct_reorder(Category_name, `Dollars`), y = `Dollars`))+ 
  coord_flip() +
    theme_bw() +
      labs(title = "Revenues for FY2022")+
    xlab("Revenue Categories") +
  ylab("Millions of Dollars") 

Expenditure and revenues when focusing on largest categories and combining others into “All Other Expenditures(Revenues)”:

exp_long %>%
  filter( Year == 2022) %>%
  mutate(rank = rank(Dollars),
        Category_name = ifelse(rank > 13, Category_name, 'All Other Expenditures')) %>%
 # select(-c(Year, Dollars, rank)) %>%
  arrange(desc(Dollars)) %>%
  ggplot() + 
  geom_col(aes(x = fct_reorder(Category_name, `Dollars`), y = `Dollars`), fill = "light green")+ 
  coord_flip() +
      theme_bw() +
    labs(title = "Expenditures for FY2022") +
    xlab("") +
  ylab("Millions of Dollars")

rev_long %>%
  filter( Year == 2022) %>%
  mutate(rank = rank(Dollars),
        Category_name = ifelse(rank > 10, Category_name, 'All Other Revenue Sources')) %>%
  arrange(desc(Dollars)) %>%
  ggplot() + 
  geom_col(aes(x = fct_reorder(Category_name, `Dollars`), y = `Dollars`), fill = "light blue")+ 
  coord_flip() +
      theme_bw() +
    labs(title = "Revenues for FY2022") +
    xlab("") +
  ylab("Millions of Dollars")

CAGR / Growth

Each year, you will need to update the CAGR formulas!

calc_cagr is a function created for calculating the CAGRs for different spans of time.

# function for calculating the CAGR
calc_cagr <- function(df, n) {
  df <- exp_long %>%
    select(-type) %>%
    arrange(Category_name, Year) %>%
    group_by(Category_name) %>%
    mutate(cagr = ((`Dollars` / lag(`Dollars`, n)) ^ (1 / n)) - 1)

  return(df)
}

# This works for one variable at a time
cagr_24 <- calc_cagr(exp_long, 24) %>% 
  # group_by(Category) %>%
  summarize(cagr_24 = round(sum(cagr*100, na.rm = TRUE), 2))

cagr23_precovid <- exp_long %>%
  filter(Year <= 2019) %>%
  calc_cagr(21) %>% 
  summarize(cagr_21 = round(sum(cagr*100, na.rm = TRUE), 2))



cagr_10 <- calc_cagr(exp_long, 10) %>% 
  filter(Year == 2022) %>%
  summarize(cagr_10 = case_when(Year == 2022 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

cagr_5 <- calc_cagr(exp_long, 5) %>% 
  filter(Year == 2022) %>%
  summarize(cagr_5 = case_when(Year == 2022 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

cagr_3 <- calc_cagr(exp_long, 3) %>% 
  filter(Year == 2022) %>%
  summarize(cagr_3 = case_when(Year == 2022 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

cagr_2 <- calc_cagr(exp_long, 2) %>% 
  filter(Year == 2022) %>%
  summarize(cagr_2 = case_when(Year == 2022 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

cagr_1 <- calc_cagr(exp_long, 1) %>% 
  filter(Year == 2022) %>%
  summarize(cagr_1 = case_when(Year == 2022 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

CAGR_expenditures_summary <- data.frame(cagr_1, cagr_2, cagr_3, cagr_5, cagr_10, cagr_24 ) %>% 
  select(-c(Category_name.1, Category_name.2, Category_name.3, Category_name.4, Category_name.5 )) %>% 
  rename("Expenditure Category" = Category_name, "1 Year CAGR" = cagr_1, "2 Year CAGR" = cagr_2, "3 Year CAGR" = cagr_3, "5 Year CAGR" = cagr_5, "10 Year CAGR" = cagr_10,"24 Year CAGR" = cagr_24 )

CAGR_expenditures_summary %>% 
  kbl(caption = "CAGR Calculations for Expenditure Categories") %>% 
  kable_styling(bootstrap_options = c("striped"))
CAGR Calculations for Expenditure Categories
Expenditure Category 1 Year CAGR 2 Year CAGR 3 Year CAGR 5 Year CAGR 10 Year CAGR 24 Year CAGR
AGING 2.69 5.00 5.94 -1.34 3.97 7.34
AGRICULTURE 35.37 11.62 5.95 5.26 2.46 0.72
BUS & PROFESSION REGULATION 8.34 4.64 2.70 1.02 -3.10 0.21
CAPITAL IMPROVEMENT -6.83 17.35 18.29 10.73 -3.77 2.07
CENTRAL MANAGEMENT -2.75 6.11 12.66 2.75 4.85 5.06
CHILDREN AND FAMILY SERVICES -3.90 0.55 2.79 3.08 0.51 -0.12
COMMERCE AND ECONOMIC OPPORTUNITY -25.67 50.78 34.76 16.78 3.24 4.75
CORRECTIONS -6.66 -0.99 -1.61 3.35 1.65 1.89
DEBT SERVICE -0.83 1.59 -0.70 1.65 1.19 6.11
ELECTED OFFICERS 4.77 5.02 1.95 5.89 3.83 3.75
EMPLOYMENT SECURITY -16.09 7.77 7.46 7.21 3.12 2.11
ENVIRONMENTAL PROTECT AGENCY -3.33 -5.02 -8.45 -6.96 -0.02 3.06
HEALTHCARE & FAM SER NET OF MEDICAID -5.67 3.07 -10.65 -0.95 -0.40 4.52
HUMAN SERVICES 11.16 10.19 9.01 6.13 3.29 2.62
IL COMMUNITY COLLEGE BOARD -2.90 0.29 3.89 -1.70 -0.22 1.38
IL STATE TOLL HIGHWAY AUTH 7.06 4.69 6.28 3.57 11.64 7.54
IL STUDENT ASSISTANCE COMM 3.37 -0.84 2.09 -3.04 -1.38 0.89
JUDICIAL -2.28 2.98 6.79 3.73 2.72 2.77
K-12 EDUCATION 9.87 8.79 7.95 6.52 4.10 4.13
LEGISLATIVE 19.56 11.86 10.73 7.38 2.39 3.20
Local Govt Transfers 44.14 26.60 16.64 9.88 6.39 4.65
MEDICAID 9.04 13.38 14.62 9.92 8.89 7.20
NATURAL RESOURCES -0.37 1.58 0.48 4.51 2.52 1.33
OTHER BOARDS & COMMISSIONS -1.48 7.80 2.01 2.18 -3.28 3.92
OTHER DEPARTMENTS -8.53 12.35 -0.39 1.75 0.93 2.96
PUBLIC HEALTH -11.33 22.75 24.71 17.86 7.50 7.14
PUBLIC SAFETY -14.34 7.63 19.57 15.99 8.16 5.96
REVENUE 31.81 40.59 55.83 36.04 16.28 7.18
State Employee Healthcare 4.20 -1.18 -2.88 -2.87 2.14 6.19
STATE PENSION CONTRIBUTION 10.42 8.46 8.92 7.53 8.80 10.70
TRANSPORTATION -20.02 2.89 8.05 0.38 -0.57 3.10
UNIVERSITY EDUCATION 4.36 3.06 3.64 0.17 -0.93 -0.13
# to have it as a csv, uncomment the line below
#write_csv(CAGR_expenditures_summary, "CAGR_expenditures_summary.csv")
calc_cagr <- function(df, n) {
  df <- rev_long %>%
    arrange(Category_name, Year) %>%
    group_by(Category_name) %>%
    mutate(cagr = ((Dollars / lag(Dollars, n)) ^ (1 / n)) - 1)

  return(df)
}

# This works for one variable at a time
cagr_24 <- calc_cagr(rev_long, 24) %>% 
     # group_by(Category) %>%
  summarize(cagr_24 = round(sum(cagr*100, na.rm = TRUE), 2))

cagr_10 <- calc_cagr(rev_long, 10) %>% 
  filter(Year == 2022) %>%
  summarize(cagr_10 = case_when(Year == 2022 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

cagr_5 <- calc_cagr(rev_long, 5) %>% 
  filter(Year == 2022) %>%
  summarize(cagr_5 = case_when(Year == 2022 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

cagr_3 <- calc_cagr(rev_long, 3) %>% 
  filter(Year == 2022) %>%
  summarize(cagr_3 = case_when(Year == 2022 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

cagr_2 <- calc_cagr(rev_long, 2) %>% 
  filter(Year == 2022) %>%
  summarize(cagr_2 = case_when(Year == 2022 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

 cagr_1 <- calc_cagr(rev_long, 1) %>% 
  filter(Year == 2022) %>%
  summarize(cagr_1 = case_when(Year == 2022 ~ round(sum(cagr*100, na.rm = TRUE), 2)))

CAGR_revenue_summary <- data.frame(cagr_1, cagr_2, cagr_3, cagr_5, cagr_10, cagr_24) %>%   
  select(-c(Category_name.1, Category_name.2, Category_name.3, Category_name.4, Category_name.5 )) %>% 
  rename("Revenue Category" = Category_name, "1 Year CAGR" = cagr_1, "2 Year CAGR" = cagr_2, "3 Year CAGR" = cagr_3, "5 Year CAGR" = cagr_5, "10 Year CAGR" = cagr_10,"24 Year CAGR" = cagr_24 )

CAGR_revenue_summary %>% 
  kbl(caption = "CAGR Calculations for Revenue Sources") %>% 
  kable_styling(bootstrap_options = c("striped"))
CAGR Calculations for Revenue Sources
Revenue Category 1 Year CAGR 2 Year CAGR 3 Year CAGR 5 Year CAGR 10 Year CAGR 24 Year CAGR
ALL OTHER SOURCES 46.48 13.20 14.88 8.83 6.61 4.38
CIGARETTE TAXES -8.25 -0.54 3.02 1.49 3.33 2.51
CORP FRANCHISE TAXES & FEES -32.40 1.21 -4.37 0.85 1.18 2.55
CORPORATE INCOME TAXES, gross of PPRT, net of refunds 76.66 72.77 38.19 32.31 13.59 7.70
FEDERAL MEDICAID 8.48 17.30 16.43 12.76 11.30 7.52
FEDERAL OTHER 114.47 42.66 49.24 27.19 11.91 7.17
FEDERAL TRANSPORTATION -22.95 1.39 10.40 -2.73 -0.06 3.33
GIFTS AND BEQUESTS 23.76 42.11 18.49 10.46 10.65 11.43
INDIVIDUAL INCOME TAXES, gross of local, net of refunds 12.60 16.35 9.25 15.22 5.36 5.68
INHERITANCE TAX 35.98 48.20 16.36 18.47 10.12 3.74
INSURANCE TAXES&FEES&LICENSES, net of refunds -3.42 12.76 5.20 2.79 3.20 6.56
LICENSES, FEES & REGISTRATIONS -4.68 15.06 16.83 9.26 6.23 7.87
LIQUOR GALLONAGE TAXES 2.53 2.81 2.49 1.69 1.37 7.45
LOTTERY RECEIPTS -6.17 9.62 1.63 2.27 0.90 2.15
MEDICAL PROVIDER ASSESSMENTS -1.98 3.67 16.26 11.80 8.33 8.36
MOTOR FUEL TAX, gross of local share, net of refunds 6.12 4.36 23.16 13.42 6.98 2.78
MOTOR VEHICLE AND OPERATORS -5.59 4.66 -0.04 0.15 0.64 3.21
OTHER TAXES 63.89 32.74 17.36 13.92 17.13 7.87
PUBLIC UTILITY TAXES, gross of PPRT 3.09 -0.43 -1.43 0.22 -0.48 0.70
RECEIPTS FROM REVENUE PRODUCNG 3.01 4.78 -2.68 1.45 3.49 5.07
RIVERBOAT WAGERING TAXES 80.77 -1.03 -8.90 -6.18 -4.20 1.75
SALES TAXES, gross of local share 11.29 12.22 7.40 6.27 4.43 3.23
# to have it as a csv, uncomment the line below
#write_csv(CAGR_revenue_summary, "CAGR_revenue_summary.csv")

rm(cagr_1, cagr_2, cagr_3, cagr_5, cagr_10, cagr_24)

Expenditure and Revenue Growth using a lag formula:

 exp_long %>% 
  group_by(Category_name) %>% 
  mutate(Growth = ((Dollars) - lag(Dollars))/lag(Dollars) *100) %>% 
  summarize(Growth = round(mean(Growth, na.rm = TRUE), 2))
 rev_long %>% 
  group_by(Category_name) %>% 
  mutate(Growth = ((Dollars) - lag(Dollars))/lag(Dollars) *100) %>% 
  summarize(Growth = round(mean(Growth, na.rm = TRUE), 2))

Change from Previous Year

revenue_change <- rev_long %>%
  select(-c(type,Category)) %>%
  filter(Year > 2020) %>%
  pivot_wider(names_from = Year , values_from = Dollars,   names_prefix = "Dollars_") %>%
  mutate(
    "FY 2022 Revenues ($ billions)" = round(Dollars_2022/1000, digits = 1),
#    "Change from 2021 to 2022" = round(Dollars_2022 - Dollars_2021, digits = 2),
         "Percent Change from 2021 to 2022" = round(((Dollars_2022 -Dollars_2021)/Dollars_2021*100), digits = 2)) %>%
  left_join(CAGR_revenue_summary, by = c("Category_name" = "Revenue Category")) %>% 
    arrange(-`FY 2022 Revenues ($ billions)`)%>%
  #select(-c(Dollars_2021, Dollars_2021, `1 Year CAGR`:`10 Year CAGR`)) %>%
  rename( "Compound Annual Growth, 1998-2022*" = `24 Year CAGR`, 
          "FY2022 Revenue Category" = Category_name ) %>%
  select(-c(Dollars_2021, Dollars_2022, `1 Year CAGR`:`10 Year CAGR`)) 

revenue_change %>% 
  kbl(caption = "Yearly Change in Revenue") %>% 
  kable_styling(bootstrap_options = c("striped"))
Yearly Change in Revenue
FY2022 Revenue Category FY 2022 Revenues ($ billions) Percent Change from 2021 to 2022 Compound Annual Growth, 1998-2022*
INDIVIDUAL INCOME TAXES, gross of local, net of refunds 23.8 12.60 5.68
FEDERAL OTHER 19.8 114.47 7.17
FEDERAL MEDICAID 19.0 8.48 7.52
SALES TAXES, gross of local share 15.4 11.29 3.23
CORPORATE INCOME TAXES, gross of PPRT, net of refunds 9.7 76.66 7.70
MEDICAL PROVIDER ASSESSMENTS 3.7 -1.98 8.36
MOTOR FUEL TAX, gross of local share, net of refunds 2.5 6.12 2.78
RECEIPTS FROM REVENUE PRODUCNG 2.4 3.01 5.07
ALL OTHER SOURCES 2.3 46.48 4.38
LICENSES, FEES & REGISTRATIONS 1.9 -4.68 7.87
GIFTS AND BEQUESTS 1.9 23.76 11.43
FEDERAL TRANSPORTATION 1.8 -22.95 3.33
MOTOR VEHICLE AND OPERATORS 1.6 -5.59 3.21
PUBLIC UTILITY TAXES, gross of PPRT 1.4 3.09 0.70
LOTTERY RECEIPTS 1.4 -6.17 2.15
OTHER TAXES 1.4 63.89 7.87
CIGARETTE TAXES 0.8 -8.25 2.51
INHERITANCE TAX 0.6 35.98 3.74
INSURANCE TAXES&FEES&LICENSES, net of refunds 0.6 -3.42 6.56
LIQUOR GALLONAGE TAXES 0.3 2.53 7.45
RIVERBOAT WAGERING TAXES 0.3 80.77 1.75
CORP FRANCHISE TAXES & FEES 0.2 -32.40 2.55
expenditure_change <- exp_long %>%
  select(-c(type,Category)) %>%
  filter(Year > 2020) %>%
  pivot_wider(names_from = Year , values_from = Dollars,   names_prefix = "Dollars_") %>%
  mutate("FY 2022 Expenditures ($ billions)" = round(Dollars_2022/1000, digits = 1),
  #  "Change from 2021 to 2022" = Dollars_2022 - Dollars_2021,
         "Percent Change from 2021 to 2022" = round((Dollars_2022 -Dollars_2021)/Dollars_2021*100, digits = 2) )%>%
  left_join(CAGR_expenditures_summary, by = c("Category_name" = "Expenditure Category")) %>% 
  arrange(-`FY 2022 Expenditures ($ billions)`)%>%
  select(-c(Dollars_2022, Dollars_2021, `1 Year CAGR`:`10 Year CAGR`)) %>%
  rename( "Compound Annual Growth, 1998-2022*" = `24 Year CAGR`, 
          "FY2022 Expenditure Category" = Category_name )

expenditure_change %>% 
  kbl(caption = "Yearly Change in Expenditures") %>% 
  kable_styling(bootstrap_options = c("striped"))
Yearly Change in Expenditures
FY2022 Expenditure Category FY 2022 Expenditures ($ billions) Percent Change from 2021 to 2022 Compound Annual Growth, 1998-2022*
MEDICAID 28.7 9.04 7.20
K-12 EDUCATION 13.4 9.87 4.13
STATE PENSION CONTRIBUTION 10.5 10.42 10.70
Local Govt Transfers 10.3 44.14 4.65
HUMAN SERVICES 7.2 11.16 2.62
TRANSPORTATION 4.1 -20.02 3.10
State Employee Healthcare 2.6 4.20 6.19
REVENUE 2.2 31.81 7.18
IL STATE TOLL HIGHWAY AUTH 2.1 7.06 7.54
DEBT SERVICE 2.0 -0.83 6.11
PUBLIC SAFETY 1.7 -14.34 5.96
CORRECTIONS 1.5 -6.66 1.89
COMMERCE AND ECONOMIC OPPORTUNITY 1.4 -25.67 4.75
CHILDREN AND FAMILY SERVICES 1.3 -3.90 -0.12
UNIVERSITY EDUCATION 1.3 4.36 -0.13
AGING 1.2 2.69 7.34
CENTRAL MANAGEMENT 1.2 -2.75 5.06
ELECTED OFFICERS 1.0 4.77 3.75
PUBLIC HEALTH 0.8 -11.33 7.14
OTHER DEPARTMENTS 0.8 -8.53 2.96
ENVIRONMENTAL PROTECT AGENCY 0.6 -3.33 3.06
IL STUDENT ASSISTANCE COMM 0.6 3.37 0.89
JUDICIAL 0.5 -2.28 2.77
IL COMMUNITY COLLEGE BOARD 0.4 -2.90 1.38
CAPITAL IMPROVEMENT 0.4 -6.83 2.07
NATURAL RESOURCES 0.3 -0.37 1.33
EMPLOYMENT SECURITY 0.3 -16.09 2.11
HEALTHCARE & FAM SER NET OF MEDICAID 0.3 -5.67 4.52
BUS & PROFESSION REGULATION 0.2 8.34 0.21
OTHER BOARDS & COMMISSIONS 0.2 -1.48 3.92
AGRICULTURE 0.1 35.37 0.72
LEGISLATIVE 0.1 19.56 3.20

Create summary file

Saves main items in one excel file named summary_file.xlsx. Delete eval=FALSE to run on local computer.

#install.packages("openxlsx")
library(openxlsx)

dataset_names <- list('rev_long' = rev_long, 'exp_long' = exp_long, 
                      `Table 1` = expenditure_change, `Table 2` = revenue_change,
                      'Table 4.a' = CAGR_revenue_summary, 'Table 4.b' = CAGR_expenditures_summary, 
                      'year_totals' = year_totals)

#write.xlsx(dataset_names, file = 'summary_file_FY2022.xlsx')